clear
clear matrix
set more off

* ------------------------------------------------------------------------------
* set paths
global root "[include path here]"
global input "$root/Input"
global output "$root/Output"
global intermediate "$root/Intermediate"
global figures "$root/Figures"

* ______________________________________________________________________________
* REGRESSIONS

use "$input/eventStudyPanel_raw.dta", clear
keep if conversions == 0 | (conversions == 1 & wg == "SF")

* merge application information into panel
sort pid year month
merge m:1 pid using "$input/applications.dta"
sort pid year month
drop if _merge == 2
tab _merge conversions

* generate pre-period indicator
gen pre = 0
replace pre = 1 if ((year == year(appdate) & month >= month(appdate)) | (year > year(appdate))) & tau < 0

* generate zero consumption indicator
by pid: egen minwuse = min(wuse)
gen zero = 0
replace zero = 1 if minwuse == 0
la var zero "1 if parcel contains zero water use anywhere"

* define local fixed effect specification variables
local fe_pmFEymc2FE "i.pid#i.month i.year#i.month#i.cohort2"
local fe "pmFEymc2FE"

****** interaction: year-cohort2 parcel-month (chi-by-year)
reghdfe wuse pre chiBY1998 chiBY1999 chiBY2000 chiBY2001 chiBY2002 chiBY2003 chiBY2004 chiBY2005 chiBY2006 chiBY2007 ///
chiBY2008 chiBY2009 chiBY2010 chiBY2011 chiBY2012 chiBY2013 chiBY2014, a(`fe_`fe'') pool(1) vce(cl pid)
	display "command line: `e(cmdline)'"
	estimates save "$output/did_`fe'chiBYyr.ster", replace
	
	preserve // PARTICIPATION PLOTS
	keep if e(sample)
	duplicates drop pid, force
		
	tabstat conv_sqft, stat(n mean sd min max) by(event_month) columns(statistics) f(%9.0gc)
	collapse (count) nconv = conv_sqft nrebate = tot_rebated ///
	(mean) meanconv = conv_sqft  ///
	(sd) sdconv = conv_sqft ///
	(semean) seconv = conv_sqft ///
	(min) minconv=conv_sqft ///
	(max) maxconv = conv_sqft, by(event_year)
	sort event_year
	save "$output/did_`fe'chiBYyr_conversions.dta", replace
	restore

set more off
local  fe_pmFEymc2FE "i.pid#i.month i.year#i.month#i.cohort2"
local fe "pmFEymc2FE"
drop _merge
	
****** interaction: chi-by-preTreatment consumption [ptc]
** pre-treatment (i.e. baseline) consumption decile analysis
local yti "number of conversions"
drop chiBY*
drop landuse lotsize landsf minconv ppsf earlyx

* define annual pre-treatment consumption using 12 x n monthly average (starting 1 year out)
preserve
foreach n of numlist 2/5 {
	local mintau = -12*`n'
	by pid: egen ptc`n'_nrm = mean(wuse_norm) if inrange(tau,`mintau',-13)
	}
keep if conversions == 1
keep if tau == -13
duplicates report pid

* define deciles of pre-treatment consumption decile (ptcd) for each 'n' (2 though 5)
foreach n of numlist 2/5 {
	xtile ptcd`n'_nrm = ptc`n'_nrm, nquantiles(10)
	la var ptcd`n'_nrm "pre-treatment (t-`n' to t-1 yr) average normalized consumption decile"
	_pctile ptc`n'_nrm, percentiles(10(10)90)
	display "return normalized _pctile list: `n'"
	return list // lists maximum of the nth decile: r(r1) = x means the top of the 0-10th decile
	}
keep pid ptcd*
compress
sort pid
save "$intermediate/temp_ptcd.dta", replace
restore 

tab conversions wg, missing
sort pid year month
merge m:1 pid using "$intermediate/temp_ptcd.dta"
sort pid year month
tab conversions _merge // sanity check
drop _merge
set more off
rm "$intermediate/temp_ptcd.dta"

* define conversion area decile indicators and create chi x indicator interactions
foreach n of numlist 2/5 {
	foreach d of numlist 1/10 {
		gen byte ptcd`n'dec`d'_nrm = 0
		replace ptcd`n'dec`d'_nrm = 1 if ptcd`n'_nrm == `d'
		gen byte chiBYptcd`n'd`d'_nrm = chi * ptcd`n'dec`d'_nrm
		drop ptcd`n'dec`d'_nrm
		}
	}

** regression

foreach n of numlist 2/5 {
display "==================== Year t-`n' to t-1 (i.e. t-1 = tau = -13) ===================="

reghdfe wuse_norm pre chiBYptcd`n'd1_nrm chiBYptcd`n'd2_nrm ///
chiBYptcd`n'd3_nrm chiBYptcd`n'd4_nrm chiBYptcd`n'd5_nrm chiBYptcd`n'd6_nrm ///
chiBYptcd`n'd7_nrm chiBYptcd`n'd8_nrm chiBYptcd`n'd9_nrm chiBYptcd`n'd10_nrm, ///
a(`fe_`fe'') pool(1) vce(cl pid)
	display "command line: `e(cmdline)'"
	local regid "normQ`fe'chiBYptcd`n'"
	estimates save "$output/did_`regid'.ster", replace
	

	preserve // PARTICIPATION PLOTS
	keep if e(sample)
	duplicates drop pid, force

	collapse (count) nconv = conv_sqft nrebate = tot_rebated ///
	(mean) meanconv = conv_sqft  ///
	(sd) sdconv = conv_sqft ///
	(semean) seconv = conv_sqft ///
	(min) minconv=conv_sqft ///
	(max) maxconv = conv_sqft, by(ptcd`n'_nrm)
	sort ptcd`n'_nrm
	save "$output/did_`regid'_conversions.dta", replace
	restore
}

* ______________________________________________________________________________
* CREATE FIGURES

* ==============================================================================
* figure 4a
* ==============================================================================
clear
set more off

local model "pmFEymc2FEchiBYyr"
local multiplier = -1000

local moyr = 12 // if you want gal/sq-ft/month
local persftitle "average water savings [gal/ft{sup:2}/year]"

* place coefficient estimates in stata data file
clear
estimates use "$output/did_`model'.ster" 
mat result = e(b)', vecdiag(e(V))'

svmat result, names(matcol)
drop if _n == 1
gen rse = sqrt(resultr1)

gen event_year = 1997
replace event_year = event_year + _n
rename resulty1 beta
rename resultr1 diag_vcm
order event_year
sort event_year

* merge coefficient estimates with conversion size summary data file
merge 1:1 event_year using "$output/did_`model'_conversions.dta"
keep if _merge==3
drop _merge
sort event_year

* generate plot variables
gen savings = `multiplier'*beta
gen savings_rse = `multiplier' * rse
gen savelb = savings - 1.96 * savings_rse
gen saveub = savings + 1.96 * savings_rse

gen unitsave = beta*-1000*`moyr'/meanconv
gen unitSE = ((`moyr'*1000)/meanconv)*rse
gen unitsave_lb = unitsave - 1.96*unitSE
gen unitsave_ub = unitsave + 1.96*unitSE

gen snwa1 = 55.8
gen snwa2 = 54.7

gr tw sc unitsave unitsave_lb unitsave_ub snwa1 snwa2 event_year ///
if event_year>=2000, c(l l l l l) lpattern(solid dash dash shortdash shortdash) ///
lc(gs0 gs7 gs7 gs10 gs10) m(O oh oh i i) mc(gs0 gs7 gs7 gs10 gs10) ///
ytitle("`persftitle'") xtitle("") xlabel(2000(1)2014) /// 
 ylabel(, nogrid) graphregion(color(white)) xsize(8) ysize(5) legend(off)
graph export "$figures/figure_4a.pdf", replace

* ==============================================================================
* figure 5a
* ==============================================================================
clear
set more off

local decile "ptcd2_nrm"  
local model "normQpmFEymc2FEchiBYptcd2"  
local xti "pre-enrollment water use decile"
local ytitle "normalized average water savings [gal/ft{sup:2}-month]"
local multiplier = -1

* place coefficient estimates in stata data file
clear
estimates use "$output/did_`model'.ster" 
mat result = e(b)', vecdiag(e(V))'

svmat result, names(matcol)
drop if _n == 1
gen rse = sqrt(resultr1)

gen `decile' = _n
rename resulty1 beta
rename resultr1 diag_vcm
order `decile'
sort `decile'

merge 1:1 `decile' using "$output/did_`model'_conversions.dta"
keep if _merge==3
drop _merge
sort `decile'

* generate plot variables
gen savings = `multiplier'*beta
gen savings_rse = `multiplier' * rse
gen savelb = savings - 1.96 * savings_rse
gen saveub = savings + 1.96 * savings_rse

* graph of savings and unit savings
gr tw sc savings savelb saveub `decile', c(l l l) lpattern(solid dash dash) ///
lc(gs0 gs7 gs7) m(O oh oh) mc(gs0 gs7 gs7) ytitle("`ytitle'") xtitle("`xti'") ///
xlabel(1(1)10) ylabel(, nogrid) graphregion(color(white)) xsize(8) ysize(5) ///
legend(off)
graph export "$figures/figure_5a.pdf", replace